import scrapy

from scrapy.selector import Selector
from movie.items import MovieItem


class MovieSpider(scrapy.Spider):
    name = "movie"

    start_urls = ['http://www.ygdy8.net/html/gndy/dyzz/list_23_1.html']

    
    def parse(self, response):
    	sel = Selector(response=response)
    	item = MovieItem()
    	all_con = sel.xpath('//div[@class="co_content8"]//ul//table[@class="tbspan"]')
    	for x in all_con:
    		urls = "http://www.ygdy8.net" + x.xpath('.//tr[2]/td[2]/b/a/@href').extract_first()
    		yield scrapy.Request(url = urls,meta = {'item':item},callback=self.parse_datail,dont_filter=True)
    	next = sel.xpath('//div[@class="x"]//a[7]/@href').extract_first()
    	if next is not None:
            yield response.follow("http://www.ygdy8.net/html/gndy/dyzz/" + next, callback = self.parse)
    	else:
            print('采集完成')
    	
    def parse_datail(self, response):
        #print(response.url)
        item = response.meta['item']
        content = response.xpath('//div[@id="Zoom"]//p/text()').extract()
        actor = ''
        # print(content)
        for each in content:
            if each[0:5] == '◎译\u3000\u3000名':
                # 译名 ◎译\u3000\u3000名\u3000  一共占居6位
                item['m_yiming'] = each[6: len(each)]
            elif each[0:5] == '◎片\u3000\u3000名':
                # 片名
                item['m_pianming'] = each[6: len(each)]
            elif each[0:5] == '◎年\u3000\u3000代':
                # 年份
                item['m_time'] = each[6: len(each)]
            elif each[0:5] == '◎产\u3000\u3000地':
                # 产地
                item['m_chandi'] = each[6: len(each)]
            elif each[0:5] == '◎类\u3000\u3000别':
                # 类别
                item['m_leibie'] = each[6: len(each)]
            elif each[0:5] == '◎语\u3000\u3000言':
                # 语言
                item['m_yuyan'] = each[6: len(each)]
            elif each[0:5] == '◎字\u3000\u3000幕':
                # 字幕
                item['m_zimu'] = each[6: len(each)]
            elif each[0:5] == '◎上映日期':
                # 上映日期
                item['m_syrq'] = each[6: len(each)]
            elif each[0:7] == '◎IMDb评分':
                # IMDb评分
                item['imdb_src'] = each[9: len(each)]
            elif each[0:5] == '◎豆瓣评分':
                # 豆瓣评分
                item['douban_src'] = each[6: len(each)]
            elif each[0:5] == '◎文件格式':
                # 文件格式
                item['wjgs'] = each[6: len(each)]
            elif each[0:5] == '◎视频尺寸':
                # 视频尺寸
                item['plet'] = each[6: len(each)]
            elif each[0:5] == '◎文件大小':
                # 文件大小
                item['m_size'] = each[6: len(each)]
            elif each[0:5] == '◎片\u3000\u3000长':
                # 片长
                item['m_pc'] = each[6: len(each)]
            elif each[0:5] == '◎导\u3000\u3000演':
                # 导演
                item['m_dy'] = each[6: len(each)]
            elif each[0:5] == '◎主\u3000\u3000演':
                # 主演
                actor = each[6: len(each)]
        for items in content:
            if items[0: 4] == '\u3000\u3000\u3000\u3000':
            	actor = actor + '\n' + items[6: len(items)]
        item['m_actor'] = actor
        yield dict(item)